library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.1
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.1
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(astsa)
## Warning: package 'astsa' was built under R version 4.1.1
##
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
##
## gas
library(xts)
## Warning: package 'xts' was built under R version 4.1.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.1.1
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
library(tseries)
## Warning: package 'tseries' was built under R version 4.1.2
library(fpp2)
## Warning: package 'fpp2' was built under R version 4.1.2
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v fma 2.4 v expsmooth 2.3
## Warning: package 'fma' was built under R version 4.1.2
## Warning: package 'expsmooth' was built under R version 4.1.2
##
##
## Attaching package: 'fpp2'
## The following object is masked from 'package:astsa':
##
## oil
library(fma)
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.1.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyverse)
library(TSstudio)
## Warning: package 'TSstudio' was built under R version 4.1.2
library(quantmod)
## Warning: package 'quantmod' was built under R version 4.1.2
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.1.3
library(tidyquant)
## Warning: package 'tidyquant' was built under R version 4.1.2
## Loading required package: PerformanceAnalytics
## Warning: package 'PerformanceAnalytics' was built under R version 4.1.2
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## == Need to Learn tidyquant? ====================================================
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(plotly)
## Warning: package 'plotly' was built under R version 4.1.1
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
https://rstudio-pubs-static.s3.amazonaws.com/372052_fdab30947be143dfb352094793078a95.html
com=read.csv("SPCOMP.csv")
com$Year <- as.Date(com$Year,"%Y-%m-%d")
data=com[com$Year<"2021-10-31",]
data = data.frame(data)
str(com)
## 'data.frame': 1814 obs. of 10 variables:
## $ Year : Date, format: "2022-02-28" "2022-01-31" ...
## $ S.P.Composite : num 4587 4574 4675 4667 4461 ...
## $ Dividend : num NA NA 60.4 60 59.6 ...
## $ Earnings : num NA NA NA NA NA ...
## $ CPI : num 279 281 279 278 277 ...
## $ Long.Interest.Rate : num 1.96 1.76 1.47 1.56 1.58 1.37 1.28 1.32 1.52 1.62 ...
## $ Real.Price : num 4589 4577 4686 4692 4507 ...
## $ Real.Dividend : num NA NA 60.5 60.3 60.3 ...
## $ Real.Earnings : num NA NA NA NA NA ...
## $ Cyclically.Adjusted.PE.Ratio: num 37.8 37.7 38.7 38.8 37.3 ...
fig <- data %>%
plot_ly(x = ~Year, y = ~S.P.Composite, type = 'scatter',
mode = 'lines',name = 'S.P.Composite')
fig <- fig %>% add_trace(y = ~Dividend, x = ~Year, name = 'Dividend')
fig <- fig %>% add_trace(y = ~CPI, x = ~Year, name = 'CPI')
fig
lm.fit=lm(S.P.Composite~.-Year,data)
summary(lm.fit)
##
## Call:
## lm(formula = S.P.Composite ~ . - Year, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -147.88 -23.27 0.12 19.92 309.31
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 204.501409 5.850495 34.955 <2e-16 ***
## Dividend 31.155486 0.928404 33.558 <2e-16 ***
## Earnings 5.008916 0.373970 13.394 <2e-16 ***
## CPI -1.523031 0.044066 -34.562 <2e-16 ***
## Long.Interest.Rate -0.152329 0.605320 -0.252 0.801
## Real.Price 0.778353 0.007818 99.559 <2e-16 ***
## Real.Dividend -17.025652 0.639535 -26.622 <2e-16 ***
## Real.Earnings -2.794044 0.272902 -10.238 <2e-16 ***
## Cyclically.Adjusted.PE.Ratio -9.465309 0.313760 -30.167 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40.61 on 1680 degrees of freedom
## (120 observations deleted due to missingness)
## Multiple R-squared: 0.9966, Adjusted R-squared: 0.9966
## F-statistic: 6.222e+04 on 8 and 1680 DF, p-value: < 2.2e-16
#After fitting the linear model, only Long.Interest.Rate is non significant
lm.fit2=lm(S.P.Composite~.-Year-Long.Interest.Rate,data)
summary(lm.fit2)
##
## Call:
## lm(formula = S.P.Composite ~ . - Year - Long.Interest.Rate, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -147.86 -23.14 0.07 19.90 308.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 203.995409 5.492581 37.14 <2e-16 ***
## Dividend 31.162646 0.927710 33.59 <2e-16 ***
## Earnings 5.034577 0.359700 14.00 <2e-16 ***
## CPI -1.528510 0.038301 -39.91 <2e-16 ***
## Real.Price 0.778329 0.007815 99.59 <2e-16 ***
## Real.Dividend -17.021607 0.639154 -26.63 <2e-16 ***
## Real.Earnings -2.812630 0.262645 -10.71 <2e-16 ***
## Cyclically.Adjusted.PE.Ratio -9.445221 0.303351 -31.14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40.6 on 1681 degrees of freedom
## (120 observations deleted due to missingness)
## Multiple R-squared: 0.9966, Adjusted R-squared: 0.9966
## F-statistic: 7.114e+04 on 7 and 1681 DF, p-value: < 2.2e-16
#Residual standard error is decreased by 0.01
require(regclass)
## Loading required package: regclass
## Warning: package 'regclass' was built under R version 4.1.3
## Loading required package: bestglm
## Warning: package 'bestglm' was built under R version 4.1.3
## Loading required package: leaps
## Warning: package 'leaps' was built under R version 4.1.2
## Loading required package: VGAM
## Warning: package 'VGAM' was built under R version 4.1.3
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:tidyr':
##
## fill
## Loading required package: rpart
## Warning: package 'rpart' was built under R version 4.1.3
## Loading required package: randomForest
## Warning: package 'randomForest' was built under R version 4.1.3
## randomForest 4.7-1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
## Important regclass change from 1.3:
## All functions that had a . in the name now have an _
## all.correlations -> all_correlations, cor.demo -> cor_demo, etc.
##
## Attaching package: 'regclass'
## The following object is masked from 'package:TSstudio':
##
## build_model
VIF(lm.fit2)
## Dividend Earnings
## 138.601072 121.850462
## CPI Real.Price
## 9.023757 36.054286
## Real.Dividend Real.Earnings
## 55.202288 69.189943
## Cyclically.Adjusted.PE.Ratio
## 4.665793
#Dividend and Earnings has the highest scores.
lm.fit3=lm(S.P.Composite~.-Year-Long.Interest.Rate- Dividend-Earnings,data)
summary(lm.fit3)
##
## Call:
## lm(formula = S.P.Composite ~ . - Year - Long.Interest.Rate -
## Dividend - Earnings, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -308.084 -64.234 1.778 96.994 287.628
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 154.76575 15.38787 10.058 < 2e-16 ***
## CPI -0.24540 0.09674 -2.537 0.0113 *
## Real.Price 1.29158 0.01818 71.059 < 2e-16 ***
## Real.Dividend -8.79039 0.90671 -9.695 < 2e-16 ***
## Real.Earnings -1.78210 0.28580 -6.236 5.68e-10 ***
## Cyclically.Adjusted.PE.Ratio -24.71080 0.77101 -32.050 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 117.8 on 1683 degrees of freedom
## (120 observations deleted due to missingness)
## Multiple R-squared: 0.9716, Adjusted R-squared: 0.9715
## F-statistic: 1.153e+04 on 5 and 1683 DF, p-value: < 2.2e-16
res <- ts(lm.fit$residuals)
autoplot(res)
acf(res)
pacf(res)
#From the plot of residuals, adf and pacf plots, it is clear that it is volatile and non-stationary in nature
dif.res<-diff(res)
ggAcf(dif.res)
ggPacf(dif.res) #p=1,q=1
ARIMA.c=function(p1,p2,q1,q2,data){
temp=c()
d=1
i=1
temp= data.frame()
ls=matrix(rep(NA,6*30),nrow=30)
for (p in p1:p2)#
{
for(q in q1:q2)#
{
for(d in 0:2)#
{
if(p+d+q<=6)
{
model<- Arima(data,order=c(p,d,q))
ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
i=i+1
print(i)
}
}
}
}
temp= as.data.frame(ls)
names(temp)= c("p","d","q","AIC","BIC","AICc")
temp
}
output <- ARIMA.c(0,2,0,2,data=res)
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
output
## p d q AIC BIC AICc
## 1 0 0 0 17300.72 17311.59 17300.73
## 2 0 1 0 11645.58 11651.01 11645.58
## 3 0 2 0 12374.91 12380.34 12374.91
## 4 0 0 1 15309.77 15326.07 15309.78
## 5 0 1 1 11563.57 11574.43 11563.57
## 6 0 2 1 11624.18 11635.04 11624.18
## 7 0 0 2 14059.31 14081.03 14059.33
## 8 0 1 2 11561.57 11577.87 11561.59
## 9 0 2 2 11566.03 11582.33 11566.05
## 10 1 0 0 11660.39 11676.69 11660.41
## 11 1 1 0 11559.33 11570.20 11559.34
## 12 1 2 0 12103.32 12114.18 12103.33
## 13 1 0 1 11574.00 11595.73 11574.02
## 14 1 1 1 11561.33 11577.62 11561.34
## 15 1 2 1 11561.88 11578.17 11561.90
## 16 1 0 2 11570.07 11597.22 11570.10
## 17 1 1 2 11563.59 11585.32 11563.61
## 18 1 2 2 11563.88 11585.61 11563.91
## 19 2 0 0 11567.28 11589.01 11567.30
## 20 2 1 0 11561.33 11577.62 11561.34
## 21 2 2 0 11959.12 11975.41 11959.13
## 22 2 0 1 11568.97 11596.13 11569.00
## 23 2 1 1 11553.68 11575.40 11553.70
## 24 2 2 1 11563.88 11585.60 11563.90
## 25 2 0 2 11539.69 11572.29 11539.74
## 26 2 1 2 11552.87 11580.02 11552.90
## 27 2 2 2 11565.87 11593.02 11565.91
## 28 NA NA NA NA NA NA
## 29 NA NA NA NA NA NA
## 30 NA NA NA NA NA NA
#The lowest AIC value is for model (2,0,2)
output[which.min(output$AIC),] #1,1,1
## p d q AIC BIC AICc
## 25 2 0 2 11539.69 11572.29 11539.74
#The lowest BIC value is for model (1,1,0)
output[which.min(output$BIC),] #1,1,1
## p d q AIC BIC AICc
## 11 1 1 0 11559.33 11570.2 11559.34
sarima(res,1,1,1)
## initial value 2.028557
## iter 2 value 2.012006
## iter 3 value 2.002620
## iter 4 value 2.002594
## iter 5 value 2.002583
## iter 6 value 2.002523
## iter 7 value 2.002476
## iter 8 value 2.002454
## iter 9 value 2.002452
## iter 10 value 2.002451
## iter 10 value 2.002451
## iter 10 value 2.002451
## final value 2.002451
## converged
## initial value 2.003747
## iter 2 value 2.003745
## iter 3 value 2.003737
## iter 4 value 2.003713
## iter 5 value 2.003706
## iter 6 value 2.003705
## iter 7 value 2.003705
## iter 8 value 2.003705
## iter 9 value 2.003705
## iter 10 value 2.003705
## iter 11 value 2.003704
## iter 11 value 2.003704
## iter 11 value 2.003704
## final value 2.003704
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 ma1 constant
## 0.2293 -0.0040 -0.1632
## s.e. 0.1121 0.1156 0.2333
##
## sigma^2 estimated as 55: log likelihood = -5777.42, aic = 11562.84
##
## $degrees_of_freedom
## [1] 1685
##
## $ttable
## Estimate SE t.value p.value
## ar1 0.2293 0.1121 2.0459 0.0409
## ma1 -0.0040 0.1156 -0.0345 0.9725
## constant -0.1632 0.2333 -0.6998 0.4842
##
## $AIC
## [1] 6.850025
##
## $AICc
## [1] 6.850033
##
## $BIC
## [1] 6.862895
sarima(res,2,0,2)
## initial value 3.668251
## iter 2 value 3.397839
## iter 3 value 3.199799
## iter 4 value 2.179445
## iter 5 value 2.163705
## iter 6 value 2.113291
## iter 7 value 2.054871
## iter 8 value 2.004008
## iter 9 value 1.987212
## iter 10 value 1.984492
## iter 11 value 1.984374
## iter 12 value 1.984344
## iter 13 value 1.984302
## iter 14 value 1.984285
## iter 15 value 1.984253
## iter 16 value 1.984176
## iter 17 value 1.983834
## iter 18 value 1.983465
## iter 19 value 1.982989
## iter 20 value 1.982839
## iter 21 value 1.982825
## iter 22 value 1.982823
## iter 23 value 1.982822
## iter 23 value 1.982822
## final value 1.982822
## converged
## initial value 2.012605
## iter 2 value 2.009563
## iter 3 value 2.007105
## iter 4 value 2.004772
## iter 5 value 2.003908
## iter 6 value 2.003455
## iter 7 value 2.003354
## iter 8 value 2.003336
## iter 9 value 2.003334
## iter 10 value 2.003333
## iter 11 value 2.003332
## iter 12 value 2.003328
## iter 13 value 2.003327
## iter 14 value 2.003325
## iter 15 value 2.003315
## iter 16 value 2.003301
## iter 17 value 2.003286
## iter 18 value 2.003279
## iter 19 value 2.003274
## iter 20 value 2.003268
## iter 21 value 2.003251
## iter 22 value 2.003206
## iter 23 value 2.003141
## iter 24 value 2.002975
## iter 25 value 2.002880
## iter 26 value 2.002877
## iter 27 value 2.002827
## iter 28 value 2.002654
## iter 29 value 2.002584
## iter 30 value 2.002538
## iter 31 value 2.002530
## iter 32 value 2.002343
## iter 33 value 2.002157
## iter 34 value 2.001336
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
## iter 35 value 2.000813
## iter 36 value 2.000337
## iter 37 value 2.000167
## iter 38 value 1.999895
## iter 39 value 1.999424
## iter 40 value 1.998627
## iter 41 value 1.998342
## iter 42 value 1.998156
## iter 43 value 1.997892
## iter 44 value 1.996903
## iter 45 value 1.996391
## iter 46 value 1.995780
## iter 47 value 1.995326
## iter 48 value 1.994174
## iter 49 value 1.993988
## iter 50 value 1.993908
## iter 51 value 1.993904
## iter 52 value 1.993816
## iter 53 value 1.993696
## iter 54 value 1.993664
## iter 55 value 1.993644
## iter 56 value 1.993644
## iter 57 value 1.993643
## iter 58 value 1.993643
## iter 59 value 1.993643
## iter 60 value 1.993642
## iter 61 value 1.993641
## iter 62 value 1.993641
## iter 63 value 1.993641
## iter 63 value 1.993641
## iter 63 value 1.993641
## final value 1.993641
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = xmean, include.mean = FALSE, transform.pars = trans, fixed = fixed,
## optim.control = list(trace = trc, REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 ar2 ma1 ma2 xmean
## 1.9289 -0.9329 -0.7238 -0.1400 2.3049
## s.e. 0.0234 0.0228 0.0343 0.0266 6.0476
##
## sigma^2 estimated as 53.78: log likelihood = -5763.85, aic = 11539.69
##
## $degrees_of_freedom
## [1] 1684
##
## $ttable
## Estimate SE t.value p.value
## ar1 1.9289 0.0234 82.3883 0.0000
## ar2 -0.9329 0.0228 -40.8923 0.0000
## ma1 -0.7238 0.0343 -21.1167 0.0000
## ma2 -0.1400 0.0266 -5.2635 0.0000
## xmean 2.3049 6.0476 0.3811 0.7032
##
## $AIC
## [1] 6.832264
##
## $AICc
## [1] 6.832285
##
## $BIC
## [1] 6.85156
sarima(res,1,1,0)
## initial value 2.028557
## iter 2 value 2.002495
## iter 3 value 2.002495
## iter 4 value 2.002495
## iter 5 value 2.002494
## iter 6 value 2.002494
## iter 6 value 2.002494
## final value 2.002494
## converged
## initial value 2.003706
## iter 2 value 2.003706
## iter 3 value 2.003706
## iter 4 value 2.003705
## iter 5 value 2.003705
## iter 6 value 2.003705
## iter 7 value 2.003705
## iter 7 value 2.003705
## iter 7 value 2.003705
## final value 2.003705
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 constant
## 0.2256 -0.1633
## s.e. 0.0237 0.2330
##
## sigma^2 estimated as 55: log likelihood = -5777.42, aic = 11560.84
##
## $degrees_of_freedom
## [1] 1686
##
## $ttable
## Estimate SE t.value p.value
## ar1 0.2256 0.0237 9.5030 0.0000
## constant -0.1633 0.2330 -0.7006 0.4837
##
## $AIC
## [1] 6.848841
##
## $AICc
## [1] 6.848845
##
## $BIC
## [1] 6.858494
#From the residual plot, it is clear that model 1 and model 3 are more significant than other model
arima.fit<-Arima(res,order=c(1,1,1))
arima.res <-arima.fit$residuals
acf(arima.res^2)
aTSA::arch.test(arima(res,order=c(1,1,1)))
## ARCH heteroscedasticity test for residuals
## alternative: heteroscedastic
##
## Portmanteau-Q test:
## order PQ p.value
## [1,] 4 80 2.22e-16
## [2,] 8 135 0.00e+00
## [3,] 12 155 0.00e+00
## [4,] 16 225 0.00e+00
## [5,] 20 232 0.00e+00
## [6,] 24 234 0.00e+00
## Lagrange-Multiplier test:
## order LM p.value
## [1,] 4 9385.3 0.000
## [2,] 8 3498.0 0.000
## [3,] 12 2139.8 0.000
## [4,] 16 1097.0 0.000
## [5,] 20 893.0 0.000
## [6,] 24 30.2 0.143
#Residual square is stationary
garch.fit<- garch(arima.res,order=c(1,1),trace=F)
## Warning in sqrt(pred$e): NaNs produced
summary(garch.fit)
##
## Call:
## garch(x = arima.res, order = c(1, 1), trace = F)
##
## Model:
## GARCH(1,1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.139632 -0.599828 -0.002454 0.621582 4.597988
##
## Coefficient(s):
## Estimate Std. Error t value Pr(>|t|)
## a0 0.06294 0.02373 2.652 0.008 **
## a1 0.18613 0.01683 11.060 <2e-16 ***
## b1 0.83833 0.01277 65.649 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Diagnostic Tests:
## Jarque Bera Test
##
## data: Residuals
## X-squared = 263.03, df = 2, p-value < 2.2e-16
##
##
## Box-Ljung test
##
## data: Squared.Residuals
## X-squared = 1.1059, df = 1, p-value = 0.293
Check whether your model is similar to this model: https://rstudio-pubs-static.s3.amazonaws.com/372052_fdab30947be143dfb352094793078a95.html
Daily USD/HKD (US dollar to Hong Kong dollar) exchange rate from January 1, 2005 to March 7, 2006, altogether 431 days of data.
library(TSA)
## Warning: package 'TSA' was built under R version 4.1.3
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:PerformanceAnalytics':
##
## kurtosis, skewness
## The following object is masked from 'package:readr':
##
## spec
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
data(usd.hkd)
returns=ts(usd.hkd$r,freq=1)
plot(ts(usd.hkd$r,freq=1),type='l',xlab='Day',ylab='Return')
data1 <- ts(usd.hkd$r, freq=1)
#The plot shows that data is volatile with values starting to converge mean after 300th lag
acf(data1)
pacf(data1)
adf.test(data1)
## Warning in adf.test(data1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: data1
## Dickey-Fuller = -7.4383, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#Judging the stationary from the acf and pacf, the data is stationary as no values are crossing threshold and no differencing is required
You have to show all the steps to your approach. If you happen to choose two or more different models, compare there AIC to find the best model.
** How do you selected the correct ARMA(p,q) for the data?
** What do you see when you check the standardized residuals plot? Do you think further modeling is needed?
** If you decided on further modeling how do you choose the appropriate GARCH(p,q) model?
** What is your chosen best ARMA+GARCH model?
** What can you say about your final model? What can you say about it’s residuals by according to the Box Ljung test results from the model?
ARIMA.c=function(p1,p2,q1,q2,data){
temp=c()
d=1
i=1
temp= data.frame()
ls=matrix(rep(NA,6*30),nrow=30)
for (p in p1:p2)#
{
for(q in q1:q2)#
{
for(d in 0:2)#
{
if(p+d+q<=6)
{
model<- Arima(data,order=c(p,d,q))
ls[i,]= c(p,d,q,model$aic,model$bic,model$aicc)
i=i+1
print(i)
}
}
}
}
temp= as.data.frame(ls)
names(temp)= c("p","d","q","AIC","BIC","AICc")
temp
}
output <- ARIMA.c(0,2,0,2,data=data1)
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
output
## p d q AIC BIC AICc
## 1 0 0 0 -1919.315 -1911.183 -1919.287
## 2 0 1 0 -1621.705 -1617.641 -1621.696
## 3 0 2 0 -1163.070 -1159.009 -1163.061
## 4 0 0 1 -1917.343 -1905.145 -1917.287
## 5 0 1 1 -1907.787 -1899.660 -1907.759
## 6 0 2 1 -1608.870 -1600.747 -1608.841
## 7 0 0 2 -1920.273 -1904.008 -1920.179
## 8 0 1 2 -1905.837 -1893.646 -1905.781
## 9 0 2 2 -1878.539 -1866.354 -1878.482
## 10 1 0 0 -1917.337 -1905.139 -1917.281
## 11 1 1 0 -1712.434 -1704.306 -1712.405
## 12 1 2 0 -1376.600 -1368.477 -1376.572
## 13 1 0 1 -1917.448 -1901.183 -1917.354
## 14 1 1 1 -1905.826 -1893.635 -1905.770
## 15 1 2 1 -1698.657 -1686.472 -1698.600
## 16 1 0 2 -1919.790 -1899.459 -1919.648
## 17 1 1 2 -1908.186 -1891.930 -1908.091
## 18 1 2 2 -1885.728 -1869.482 -1885.634
## 19 2 0 0 -1920.556 -1904.291 -1920.462
## 20 2 1 0 -1759.732 -1747.541 -1759.676
## 21 2 2 0 -1474.160 -1461.976 -1474.104
## 22 2 0 1 -1920.538 -1900.207 -1920.397
## 23 2 1 1 -1909.541 -1893.286 -1909.447
## 24 2 2 1 -1745.271 -1729.025 -1745.177
## 25 2 0 2 -1930.385 -1905.989 -1930.187
## 26 2 1 2 -1911.351 -1891.032 -1911.209
## 27 2 2 2 -1890.007 -1869.699 -1889.865
## 28 NA NA NA NA NA NA
## 29 NA NA NA NA NA NA
## 30 NA NA NA NA NA NA
#The lowest AIC value is for model (2,0,2)
output[which.min(output$AIC),] #2,0,2
## p d q AIC BIC AICc
## 25 2 0 2 -1930.385 -1905.989 -1930.187
#The lowest BIC value is for model (0,0,0)
output[which.min(output$BIC),] #0,0,0
## p d q AIC BIC AICc
## 1 0 0 0 -1919.315 -1911.183 -1919.287
sarima(res,2,0,2)
## initial value 3.668251
## iter 2 value 3.397839
## iter 3 value 3.199799
## iter 4 value 2.179445
## iter 5 value 2.163705
## iter 6 value 2.113291
## iter 7 value 2.054871
## iter 8 value 2.004008
## iter 9 value 1.987212
## iter 10 value 1.984492
## iter 11 value 1.984374
## iter 12 value 1.984344
## iter 13 value 1.984302
## iter 14 value 1.984285
## iter 15 value 1.984253
## iter 16 value 1.984176
## iter 17 value 1.983834
## iter 18 value 1.983465
## iter 19 value 1.982989
## iter 20 value 1.982839
## iter 21 value 1.982825
## iter 22 value 1.982823
## iter 23 value 1.982822
## iter 23 value 1.982822
## final value 1.982822
## converged
## initial value 2.012605
## iter 2 value 2.009563
## iter 3 value 2.007105
## iter 4 value 2.004772
## iter 5 value 2.003908
## iter 6 value 2.003455
## iter 7 value 2.003354
## iter 8 value 2.003336
## iter 9 value 2.003334
## iter 10 value 2.003333
## iter 11 value 2.003332
## iter 12 value 2.003328
## iter 13 value 2.003327
## iter 14 value 2.003325
## iter 15 value 2.003315
## iter 16 value 2.003301
## iter 17 value 2.003286
## iter 18 value 2.003279
## iter 19 value 2.003274
## iter 20 value 2.003268
## iter 21 value 2.003251
## iter 22 value 2.003206
## iter 23 value 2.003141
## iter 24 value 2.002975
## iter 25 value 2.002880
## iter 26 value 2.002877
## iter 27 value 2.002827
## iter 28 value 2.002654
## iter 29 value 2.002584
## iter 30 value 2.002538
## iter 31 value 2.002530
## iter 32 value 2.002343
## iter 33 value 2.002157
## iter 34 value 2.001336
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
## iter 35 value 2.000813
## iter 36 value 2.000337
## iter 37 value 2.000167
## iter 38 value 1.999895
## iter 39 value 1.999424
## iter 40 value 1.998627
## iter 41 value 1.998342
## iter 42 value 1.998156
## iter 43 value 1.997892
## iter 44 value 1.996903
## iter 45 value 1.996391
## iter 46 value 1.995780
## iter 47 value 1.995326
## iter 48 value 1.994174
## iter 49 value 1.993988
## iter 50 value 1.993908
## iter 51 value 1.993904
## iter 52 value 1.993816
## iter 53 value 1.993696
## iter 54 value 1.993664
## iter 55 value 1.993644
## iter 56 value 1.993644
## iter 57 value 1.993643
## iter 58 value 1.993643
## iter 59 value 1.993643
## iter 60 value 1.993642
## iter 61 value 1.993641
## iter 62 value 1.993641
## iter 63 value 1.993641
## iter 63 value 1.993641
## iter 63 value 1.993641
## final value 1.993641
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = xmean, include.mean = FALSE, transform.pars = trans, fixed = fixed,
## optim.control = list(trace = trc, REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 ar2 ma1 ma2 xmean
## 1.9289 -0.9329 -0.7238 -0.1400 2.3049
## s.e. 0.0234 0.0228 0.0343 0.0266 6.0476
##
## sigma^2 estimated as 53.78: log likelihood = -5763.85, aic = 11539.69
##
## $degrees_of_freedom
## [1] 1684
##
## $ttable
## Estimate SE t.value p.value
## ar1 1.9289 0.0234 82.3883 0.0000
## ar2 -0.9329 0.0228 -40.8923 0.0000
## ma1 -0.7238 0.0343 -21.1167 0.0000
## ma2 -0.1400 0.0266 -5.2635 0.0000
## xmean 2.3049 6.0476 0.3811 0.7032
##
## $AIC
## [1] 6.832264
##
## $AICc
## [1] 6.832285
##
## $BIC
## [1] 6.85156
Example:
\(\phi(B) x_t = \delta + \theta(B) y_t\),
where \(\phi(B)=1-\phi_1 B-\dots-\phi_p B^p\); and \(\theta(B)=1+\theta_1 B+\dots+\theta_q B^q\)
\(y_t=\sigma_t \epsilon_t\)
\(var(y_t|y_{t-1})=\sigma ^2= \alpha_0+ \alpha_1(y_{t-1})^2 + \beta_1 \sigma_{t-1}^2\)
Fitting a GARCH model to the sp500w data: (Note that this is “weekly” data)
a). Plot Weekly closing returns of the SP 500 from 2003 to September, 2012 and comment on the volatality.
autoplot(sp500w)+ ggtitle('Weekly Growth Rate of S&P 500')
b). Plot the ACF and PACF of closing returns of the SP 500 data, What can you deduce from this graphs?
c). Plot ACF and PACF of squared returns of the SP 500 data, what do you see? Do you think ARCH/GARCH model is appropriate? Why? What is your suggested GARCH model for the data?
e). Fit your choise of the GARCH model to the data.
f). Fit AR(3)–GARCH(1,1) as the example discussed in example(note that the example was on monthly data) do you still observe the need to drop all the AR parameters.